/**********************************************************************/
/*
   Title: clean_susenas_finance_mar19.do
   Author: Clotaire Boyer, Robbie Dulin
   Created: 23 Feb 2021
   Description: Checks, cleans, and creates outcome variables in raw
		 March 2019 SUSENAS data.
*/
/**********************************************************************/

/*----------------------------------------------------*/
                /* Section: Setup */
/*----------------------------------------------------*/

version 14.2
clear
set more off

global randomization                "$dropboxbase/04_ResearchDesign/02 Randomization/1705 Kab Randomization v6 - 8.2 milion, treat most java - FINAL"

cap log close
local prefix: display %tdCYND td(`c(current_date)')
log using "$log/`prefix'_clean_susenas_finance_mar19", replace text



/*----------------------------------------------------*/
                /* Section: Blok 5 & 6 */
/*----------------------------------------------------*/

** Block 5 a

	u "$importable/2019_03_BLOK5A.dta", clear

rename renum RENUM

gen ex1 = 1

collapse (sum) ex , by(RENUM)


tempfile blok5a
save `blok5a'

** Block 5 b

u "$importable/2019_03_BLOK5B.dta", clear

rename renum RENUM

gen nnagric_income = 0
replace nnagric_income = b5bk6 if b5bk3 > 6 & b5bk3 < 99
// summ nnagric_income

gen foodaccom_income = 0
replace foodaccom_income = b5bk6 if b5bk3 == 14

gen trans_income = 0
replace foodaccom_income = b5bk6 if b5bk3 == 13

gen retail_income = 0
replace retail_income = b5bk6 if b5bk3 == 12

gen manuf_income = 0
replace retail_income = b5bk6 if b5bk3 == 9

collapse (sum) nnagric_income foodaccom_income ///
trans_income retail_income manuf_income, by(RENUM)

// summ nnagric_income

	tempfile blok5b
	save `blok5b'


** Block 6 and other parts of 5

	u "$importable/2019_03_BLOK5_RT.dta", clear

	rename renum RENUM

	rename b6r2k2 hh_busi_income

	rename b6r6 hh_savings

	merge 1:1 RENUM using `blok5b'

				// we can recode conditionality as 0 for non existant businesses incomes

	foreach var in nnagric_income foodaccom_income trans_income retail_income manuf_income {
    replace `var' = 0 if hh_busi_income == 0 // or if _merge == 1 perfectly similar
}
		drop _m

keep *_income hh_* RENUM

	//	merge 1:1 RENUM using `blok5a'
	//	drop _m

	tempfile blok6
	save `blok6'




/*----------------------------------------------------*/
                /* Section: Blok 42 */
/*----------------------------------------------------*/

** financial transaction fees
	u "$importable/blok42_mar19.dta", clear
	keep if KODE == 268 | KODE == 287
	gen fin_trans_fees = B42K5 if KODE == 268
	// summ fin_trans_fees

** cell phone/smartphone spending (purchase, repairs, accessories)
	gen cell_phone_spend = B42K5 if KODE == 287
	// summ cell_phone_spend

** collapse to HH level
	collapse (sum) fin_trans_fees cell_phone_spend, by(RENUM)
	// summ fin_trans_fees if fin_trans_fees > 0
	// summ cell_phone_spend if cell_phone_spend > 0

* save
	tempfile blok42
	save `blok42'

/*----------------------------------------------------*/
            /* Section: Individual Module */
/*----------------------------------------------------*/

u "${importable}/kor19ind.dta", clear

** KB card

/// tab  R1604
gen KB_ind = R1604
replace KB_ind = 0 if R1604 == 5
// summ KB_ind


** hours worked
	/// tab  R701_A
	//tab  R703
	// summ  R707
	// //tab  R707
	gen hours_worked = R707

** student
	//tab  R701_B
	gen student = R701_B == "B"

** savings account
	//tab  R808
	recode R808 (5 = 0), gen(savings_ind)
	// gen savings_ind_miss = savings_ind == .
	// tab R407 savings_ind_miss
	// all missings are for children < 5

** Online banking
	//tab  R807_H
	gen online_banking_ind = R807_H == "H"
	//tab  online_banking_ind

** Online purchasing
	//tab  R807_E
	gen online_buy_ind = R807_E == "E"
	//tab  online_buy_ind

** Online selling
	//tab  R807_F
	gen online_sell_ind = R807_F == "F"
	//tab  online_sell_ind

** female head of household
	//tab  R403 R405 if R403 == 1
	gen female_KRT = R403 == 1 & R405 == 2 if R403 == 1
	// summ female_KRT

** head of household is widow
	//tab  female_KRT R404
	gen widow = female_KRT == 1 & R404 == 4
	//tab  widow

** disruptive health issue
	//tab  R1102
	/// tab  R1103
	gen health_complaints_ind = R1103 == 1
	// summ health_complaints_ind

** disabilities
	* Vision
	/// tab  R1002
	gen vision_dis_sev = R1002 == 1 | R1002 == 2 if R1002 != .
	// summ vision_dis_sev
	gen vision_dis_mild = R1002 == 1 | R1002 == 2 | R1002 == 3 if R1002 != .
	// summ vision_dis_mild

	* Hearing
	//tab  R1003
	gen hearing_dis_sev = R1003 == 5 | R1003 == 6 if R1003 != .
	// summ hearing_dis_sev
	gen hearing_dis_mild = R1003 == 5 | R1003 == 6 | R1003 == 7 if R1003 != .
	// summ hearing_dis_mild

	* walking
	//tab  R1004
	gen walking_dis_sev = R1004 == 1 | R1004 == 2 if R1004 != .
	// summ walking_dis_sev
	gen walking_dis_mild = R1004 == 1 | R1004 == 2 | R1004 == 3 if R1004 != .
	// summ walking_dis_mild

	* hand movement
	//tab  R1005
	gen hand_dis_sev = R1005 == 5 | R1005 == 6 if R1005 != .
	// summ hand_dis_sev
	gen hand_dis_mild = R1005 == 5 | R1005 == 6 | R1005 == 7 if R1005 != .
	// summ hand_dis_mild

	* memory/concentration
	//tab  R1006
	gen memory_dis_sev = R1006 == 1 | R1006 == 2 if R1006 != .
	// summ memory_dis_sev
	gen memory_dis_mild = R1006 == 1 | R1006 == 2 | R1006 == 3 if R1006 != .
	// summ memory_dis_mild

	* behavioral disorder
	//tab  R1007
	gen behav_dis_sev = R1007 == 5 | R1007 == 6 if R1007 != .
	// summ behav_dis_sev
	gen behav_dis_mild = R1007 == 5 | R1007 == 6 | R1007 == 7 if R1007 != .
	// summ behav_dis_mild

	* Speech disorder
	//tab  R1008
	gen speech_dis_sev = R1008 == 1 | R1008 == 2 if R1008 != .
	// summ speech_dis_sev
	gen speech_dis_mild = R1008 == 1 | R1008 == 2 | R1008 == 3 if R1008 != .
	// summ speech_dis_mild

	* Unable to take care of oneself
	//tab  R1009
	gen take_care_dis_sev = R1009 == 5 | R1009 == 6 if R1009 != .
	// summ take_care_dis_sev
	gen take_care_dis_mild = R1009 == 5 | R1009 == 6 | R1009 ==7 if R1009 != .
	// summ take_care_dis_mild

	* any disability (individual)
	egen tot_disability = rowtotal(*dis*)
	gen any_disability = tot_disability >= 1
	//tab  any_disability

	* any severe disability (individual)
	egen tot_sev_disability = rowtotal(*dis_sev)
	gen any_sev_disability = tot_sev_disability >= 1
	//tab  any_sev_disability
	drop tot_disability tot_sev_disability

	* head of household disability
	gen hh_head = R403 == 1
	gen any_dis_KRT = any_disability == 1 & hh_head == 1
	//tab  any_dis_KRT if hh_head == 1

	* head of household severe disability
	gen any_sev_dis_KRT = any_sev_disability == 1 & hh_head == 1
	//tab  any_sev_dis_KRT if hh_head == 1

** Head of household education levels
	//tab  R613
	// School classification descriptions from: https://mikrodata.bps.go.id/mikrodata/index.php/catalog/814/datafile/F1#page=F4&//tab =data-dictionary

	* Elementary school
	// dummy if hh member has elementary edu or equivalent: (paket A, SDLB, SD, MI)
	gen elementary = inlist(R613, 1, 2, 3, 4) if R403 == 1

	* Junior high school
	// dummy if hh member has junior high edu or equivalent: (paket B, SMPLB, SMP, MTs)
	gen junior_high = inlist(R613, 5, 6, 7, 8) if R403 == 1

	* High school
	// dummy if hh member has high school edu or equivalent, or vocational school: (paket C, SMLB, SMA, MA, SMK, MAK)
	gen high_school = inlist(R613, 9, 10, 11, 12, 13, 14) if R403 == 1

	* Tertiary edu
	// dummy if hh member has tertiary edu: (D1/D2, D3, D4, S1, S2, S3)
	gen tertiary = inlist(R613, 15, 16, 17, 18, 19, 20, 21) if R403 == 1
	// summ elementary junior_high high_school tertiary


*** Non HH head edu
	* Elementary school
	//tab  R615
	// dummy if hh member has elementary edu or equivalent: (paket A, SDLB, SD, MI)
	gen elementary_edu = inlist(R615, 2, 3, 4, 5)

	* Junior high school
	// dummy if hh member has junior high edu or equivalent: (paket B, SMPLB, SMP, MTs)
	gen junior_high_edu = inlist(R615, 6, 7, 8, 9)

	* High school
	// dummy if hh member has high school edu or equivalent, or vocational school: (paket C, SMLB, SMA, MA, SMK, MAK)
	gen high_school_edu = inlist(R615, 10, 11, 12, 13, 14, 15)

	* Tertiary edu
	// dummy if hh member has tertiary edu: (D1/D2, D3, D4, S1, S2, S3)
	gen tertiary_edu = inlist(R615, 16, 17, 18, 19, 20, 21, 22)
	// summ elementary junior_high high_school tertiary

** kids
	// summ R407
	gen kid = R407 < 18
	// summ R407 if kid == 1

** adults
	gen adult = R407 >= 18
	// summ adult
	gen age = R407

** eduation levels
	gen high_edu = R613 >= 9 if R613 != .

** gender
	//tab  R405
	gen female = R405 == 2

** older vs. younger
	gen under30 = R407 < 30
	gen under40 = R407 < 40

** cell phone
	//tab  R802
	recode R802 (5 = 0), gen(cell_phone)

** internet usage
	//tab  R804
	recode R804 (5 = 0), gen(internet_use)

	*  Recovering the numnber of small buisnesses in SUESENA 2019

	//tab  R705
	gen smallbui_sus19_type1_ind = R705 == 1
	replace smallbui_sus19_type1_ind = 0 if R704 <= 6 // removing farmers
	replace smallbui_sus19_type1_ind = 0 if R704 == 25
	// summ smallbui_sus19_type1_ind

	gen smallbui_sus19_type2_ind = R705 == 2
	replace smallbui_sus19_type2_ind = 0 if R704 <= 6
	replace smallbui_sus19_type2_ind = 0 if R704 == 25
	// summ smallbui_sus19_type2_ind

	gen smallbui_sus19_type3_ind = R705 == 3
	replace smallbui_sus19_type3_ind = 0 if R704 <= 6
	replace smallbui_sus19_type3_ind = 0 if R704 == 25
	// summ smallbui_sus19_type3_ind

	gen smallbui_sus19_all_ind = smallbui_sus19_type1_ind
	replace smallbui_sus19_all_ind = 1 if smallbui_sus19_type2_ind == 1
	replace smallbui_sus19_all_ind = 1 if smallbui_sus19_type2_ind == 1
	// summ smallbui_sus19_all_ind

gen bus_nagr = (R704 > 6 & R704 < 999)
gen bus_ret = (R704 == 12) // retail
gen bus_food = (R704 == 14) // Food accom
gen bus_manuf = (R704 == 8) // Manufacturing
gen bus_other = (R704 == 7 ///
	| R704 ==  9 | R704 ==  11 | R704 ==  15 | R704 ==  16 | R704 ==  17 ///
	| R704 ==  18 | R704 ==  19 | R704 ==  20 | R704 ==  21 | R704 ==  22 ///
	| R704 ==  23 | R704 ==  24 | R704 ==  25 | R704 ==  26) // Other
gen bus_trans = (R704 == 13) // Transportaion
gen bus_agr = (R704 == 2 | R704 == 3 | R704 == 4 | R704 == 5) // Agric not farming


** save individual level dataset (will come back to it at the end)
	preserve
	keep KB_ind smallbui_sus19_all_ind smallbui_sus19_type1_ind smallbui_sus19_type2_ind smallbui_sus19_type3_ind ///
	RENUM R101 R102 R105 R40? bus_ret bus_nagr bus_food bus_manuf bus_other bus_trans bus_agr ///
	savings_ind online* adult high_edu female under* cell_phone internet_use FWT *edu hours_worked age student kid widow
	tempfile ind_dataset
	qui save `ind_dataset'
	restore

** collapse total by household
	collapse (sum) savings_ind online* female_KRT widow num_kids=kid elementary junior_high high_school tertiary ///
	KB_ind smallbui_sus19_all_ind smallbui_sus19_type1_ind smallbui_sus19_type2_ind smallbui_sus19_type3_ind ///
		internet_use cell_phone health_complaints = health_complaints_ind *_dis_* , by(RENUM)

** HH level variables
	* internet use
	// summ internet_use
	gen internet_use_hh = internet_use > 0
	drop internet_use
	rename internet_use_hh internet_use
	//tab  internet_use

	* cell phone ownership
	// summ cell_phone
	gen cell_phone_hh = cell_phone > 0
	drop cell_phone
	rename cell_phone_hh cell_phone
	//tab  cell_phone

	* single woman with kids
	gen single_mom = female_KRT == 1 & num_kids > 0
	//tab  single_mom

	* Savings account = 1 if at least one hh member has savings account
	* Savings account = 1 if at least one hh member has savings account  MISTAKE //!\\
	gen savings_account = (savings_ind >= 1 & savings_ind < 99)
	drop savings_ind

	* KB  = 1 if at least one hh member has KB
	gen KB = KB_ind >= 1
	drop KB_ind

	* Small buism variables = 1 if at least one hh member is part of such business


	gen smallbui_sus19_all = (smallbui_sus19_all_ind >= 1) if smallbui_sus19_all_ind<.



	gen smallbui_sus19_type1 = (smallbui_sus19_type1_ind >= 1) if smallbui_sus19_type1_ind<.
	drop smallbui_sus19_type1_ind

	gen smallbui_sus19_type2 = (smallbui_sus19_type2_ind >= 1) if smallbui_sus19_type2_ind<.
	drop smallbui_sus19_type2_ind

	gen smallbui_sus19_type3 = (smallbui_sus19_type3_ind >= 1) if smallbui_sus19_type3_ind<.
	drop smallbui_sus19_type3_ind


	* online banking
	gen online_banking = online_banking_ind >= 1
	//tab  online_banking
	drop online_banking_ind

	* online buying
	gen online_buy = online_buy_ind >= 1
	//tab  online_buy
	drop online_buy_ind

	* online selling
	gen online_sell = online_sell_ind >= 1
	//tab  online_sell
	drop online_sell_ind

	* anyone in hh with health complaint
	gen health_complaints_hh = health_complaints > 0 & health_complaints != .
	// summ health_complaints_hh
	drop health_complaints

	* dummy for any severe/mild disability
	// any disability at all
	egen any_disability_hh = rowtotal(*_dis_*)
	gen any_disability = any_disability_hh >= 1
	// summ any_disability
	drop any_disability_hh

	// severe disability
	egen severe_disabilty_hh = rowtotal(*dis_sev)
	gen severe_disabilty = severe_disabilty_hh >= 1
	// summ severe_disabilty
	drop severe_disabilty_hh
	drop *_dis_mild* *_dis_sev*

	// summ
	tempfile ind
	save `ind'

/*----------------------------------------------------*/
            /* Section: Merge Datasets */
/*----------------------------------------------------*/

** Merge SUSENAS modules into household module
	u "$importable/kor19rt.dta", clear

	merge 1:1 RENUM using `ind'
	drop _m

	merge 1:1 RENUM using `blok42'
	drop _m

	merge 1:1 RENUM using `blok6'
	drop _m

** Create kabupaten identifier
	tostring R101, gen(R101S)
	tostring R102, gen(R102S)
	gen KABU = R101S + "0" + R102S if length(R102S) == 1
	replace KABU = R101S + R102S if length(R102S) > 1
	assert length(KABU) == 4
	destring KABU, replace
	drop R101S R102S

** Merge in kabupaten names (treatment and control districts identified by kabu name, so need name to merge)
	preserve
	import excel "${importable}/province_names.xlsx", firstrow clear
	rename KodeProvinsi R101
	rename KodeKabupaten R102
	destring R101, replace
	destring R102, replace
	rename NamaKabupaten namakabupaten
	rename NamaProvinsi provinsi
	tempfile kabu_names
	save `kabu_names'
	restore

	merge  m:1 R101 R102 using "`kabu_names'", assert(3) nogen


*** Merge in treatment assignments
	* load treatment and control status dataset
	preserve
	u "$randomization/treatments.dta", clear
	keep namaprovinsi namakabupaten treated finalstratum
	rename namaprovinsi provinsi
	keep if treated != .

	* kota/kab variable to distinguish between districts with same name in same province
	/* I identify which districts are experimental using ⁨05_Intervention⁩/01 Impact Evaluation⁩/20190523_Kab Kota Phase Expansion.xlsx,
			which distinguishes between kota and kab for same-name districts. I assign kab status to these
			districts to match the phase sheet. */
	gen kab = 1 if inlist(namakabupaten, "BIMA", "GORONTALO", "PEKALONGAN", "SERANG")

	* rename Kabupaten Pontianak to Mempawah (name change in 2014)
	// Note: Kota Pontianak is A/B district, kabupaten Mempawah/Pontianak is IE district
	replace namakabupaten = "MEMPAWAH" if namakabupaten == "PONTIANAK"
	tempfile treatments
	save `treatments'
	restore

	* abbreviate province names to match treatment assignment file
	replace provinsi = "KEP. BANGKA BELITUNG" if provinsi == "KEPULAUAN BANGKA BELITUNG"
	replace provinsi = "D I YOGYAKARTA" if provinsi == "DI YOGYAKARTA"
	replace provinsi = "NTB" if provinsi == "NUSA TENGGARA BARAT"

	* create kab variable for corresponding districts
	gen kab = 1 if inlist(KABU, 5206, 7502, 3326, 3604)

	* merge treatment assignments
	merge  m:1 provinsi namakabupaten kab using "`treatments'"
	assert _m != 2
	drop _m

	* holdout sample dummy
	gen holdoutsample = finalstratum == 1000


*** Merge in treatment phase
	* load treatment phase dataset
	preserve
	u "$randomization/phases_T_n.dta", clear
	append using "$randomization/phases_C_n.dta"
	keep namaprovinsi namakabupaten treated phase*
	rename namaprovinsi provinsi
	keep if treated != .

	* differentiate between same-name districts
	gen kab = 1 if inlist(namakabupaten, "BIMA", "GORONTALO", "PEKALONGAN", "SERANG")

	* rename Kabupaten Pontianak to Mempawah (name change in 2014)
	// Note: Kota Pontianak is A/B district, kabupaten Mempawah/Pontianak is IE district
	replace namakabupaten = "MEMPAWAH" if namakabupaten == "PONTIANAK"

	* replace phase = 0 if control & set to 0 if missing
	foreach var of varlist phase? {
		replace `var' = 0 if treated == 0
		replace `var' = 0 if `var' == .
	}
	// bysort treated: summ phase?

	tempfile phases
	qui save `phases'
	restore

	* merge phase assignments
	merge m:1 provinsi namakabupaten kab using "`phases'"
	assert _m != 2
	drop _m
	drop kab

/*
** remove treated control districts from experimental sample
	// treated controls identified from: "J-PAL Raskin Transition/05_Intervention/01 Impact Evaluation/20190523_Kab Kota Phase Expansion.xlsx"
	replace treated = . if provinsi == "SUMATERA UTARA"     & namakabupaten == "LANGKAT"
	replace treated = . if provinsi == "KALIMANTAN SELATAN" & namakabupaten == "TABALONG"
	replace treated = . if provinsi == "JAWA TENGAH"        & namakabupaten == "PEKALONGAN"
*/

* dummy for experimental districts
	gen experimental = treated != .

** assert double named districts properly assigned
	foreach dist of numlist 5272 3279 7571 3375 3673 {
		assert experimental == 0 if KABU == `dist'
	}
	foreach dist of numlist 6303 5206 7502 3326 3604 {
		assert experimental == 1 if KABU == `dist'
	}


/*----------------------------------------------------*/
            /* Section: Household Module */
/*----------------------------------------------------*/

*** Rastra variables
	* Rastra receipt
	// tab  R2101
	gen raskin_receipt = R2101 == 1
	// summ raskin_receipt

*** BPNT variables
	* BPNT receipt
	// tab  R2109
	gen bpnt_receipt = R2109 == 1
	// summ bpnt_receipt

	* BPNT use
	// tab  R2110BK2
	gen bpnt_use = ( R2110BK2 == 1 | R2110BK3 == 1 | R2110BK4 == 1 | R2110BK5 == 1)


	* dummy for receiving any Rastra or BPNT
	// all HHs which report Rastra receipt get some rice
	gen receive_subsidy = bpnt_receipt == 1 | raskin_receipt == 1
	// summ receive_subsidy

	* log per capita consumption (monthly)
	// summ EXP_CAP
	gen log_cons = ln(EXP_CAP)

	* per capita consumption, in levels (monthly)
	gen cons = EXP_CAP

	* KKS card
	// tab  R2105
	gen kks = R2105 == 1 | R2105 == 2

	* pkh recipient
	// tab  R2106
	gen pkh_receipt = R2106 == 1

	* where to receive PKH assistance
	// tab  R2107C
	gen pkh_post = R2107C == 1 if pkh_receipt == 1
	gen pkh_atm = R2107C == 2 if pkh_receipt == 1
	gen pkh_bank = R2107C == 3 if pkh_receipt == 1
	gen pkh_agent = R2107C == 4 if pkh_receipt == 1
	gen pkh_other = R2107C == 5 if pkh_receipt == 1
	// summ pkh*

	* online transactions
	// summ online_buy online_sell
	gen online_transact = online_buy + online_sell > 0
	// tab  online_transact

	* any financial transactions fees
	// summ fin_trans_fees
	gen any_fin_trans_fee = fin_trans_fees > 0 & fin_trans_fees != .
	drop fin_trans_fees
	rename any_fin_trans_fee fin_trans_fees
	// tab  fin_trans_fees

	* any cell phone spending
	// summ cell_phone_spend
	gen cell_phone_buy = cell_phone_spend > 0 & cell_phone_spend != .
	// tab  cell_phone_buy
	drop cell_phone_spend

** credit variables
	* receive credit from any source
	// tab  R1901A
	gen credit = R1901A == 1 | R1901B == 1 | R1901C == 1 | R1901D == 1 | R1901E == 1 | ///
		R1901F == 1 | R1901G == 1 | R1901H == 1 | R1901I == 1 | R1901J == 1
	// tab  credit

	* receive credit from KUR
	gen credit_KUR = R1901A == 1
	// tab  credit_KUR

	* receive credit from commercial banks
	gen credit_bank = R1901B == 1
	// tab  credit_bank

	* receive credit from rural credit banks (BPR)
	gen credit_BPR = R1901C == 1
	// tab  credit_BPR

	* receive credit from coop
	gen credit_coop = R1901D == 1
	// tab  credit_coop

	* receive individual loans with interest
	gen credit_loan = R1901E == 1
	// tab  credit_loan

	* receive mortgage loan
	gen credit_mtg = R1901F == 1
	// tab  credit_mtg

	* receive credit from leasing companies
	gen credit_lease = R1901G == 1
	// tab  credit_lease

	* receive credit from KUBE
	gen credit_KUBE = R1901H == 1
	// tab  credit_KUBE

	* receive credit from village owned enterprises
	gen credit_BUMDES = R1901I == 1
	// tab  credit_BUMDES

	* receve credit from other source
	gen credit_other = R1901J == 1
	// tab  credit_other

	* receive social assistance from local govt
	//tab  R2111
	gen local_gov_aid = R2111 == 1

	* amount of social assistance from local govt received
	// summ R2112A_* R2112B_1
	egen local_aid_rp = rowtotal(R2112A_* R2112B_1)
	// bysort local_gov_aid: summ local_aid_rp

	* HH size
	// summ R301
	gen num_in_hh = R301

	* Log HH size
	gen log_num_in_hh = ln(num_in_hh)

	* fraction kids in hh
	gen fraction_kids = num_kids / num_in_hh
	// summ fraction_kids

	* HH head elementary edu or less
	// summ elementary junior_high high_school tertiary
	gen elementary_or_less = elementary
	replace elementary_or_less = 1 if elementary == 0 & junior_high == 0 & high_school == 0 & tertiary == 0
	// summ elementary_or_less


** phk_other_location

	gen phk_other_location=pkh_bank
	replace phk_other_location=1 if pkh_post==1
	replace phk_other_location=1 if pkh_atm==1
	replace phk_other_location=1 if pkh_other==1


/*----------------------------------------------------*/
        /* Section: Drop & label variables  */
/*----------------------------------------------------*/

* drop & rename
	drop R17* R18* R19* R2* NUINFORT1 WI*

// * Save full-sample dataset
// 	save "$cleaned/finance/susenas_mar19_finance_full.dta", replace

* Keep experimental sample only
	keep if experimental == 1
	di _N

** Merge in readiness indices
	* load readiness indices
	preserve
	import excel "$importable/20171016-Perluasan BPNT-V11.xlsx", clear firstrow sheet("514 KabKota")
	keep idkabu Indeks*
	rename idkabu KABU
	tempfile index
	save `index'
	restore

	* merge in index
	merge m:1 KABU using `index'
	assert _m != 1
	drop if _m == 2
	drop _m

	* clean index variables
	foreach var of varlist Indeks* {
		replace `var' = "" if `var' == "n/a"
		destring `var', replace
	}
	rename IndeksFasilitasKesiapanNonTu facilities_index
	rename IndeksInfrastuktur           infra_index
	rename IndeksGabungan               composite_index
	// summ *index

	* compute above median infrastructure dummy
	preserve
	duplicates drop KABU, force
	bysort treated: summ composite_index
	qui summ composite_index, d
	gen hi_comp_index = composite_index > `r(p50)' if composite_index != .
	// summ hi_comp_index
	keep KABU hi_comp_index
	tempfile comp_index
	save `comp_index'
	restore

	merge m:1 KABU using `comp_index', assert(3) nogen
	// summ hi_comp_index


/*----------------------------------------------------*/
                /* Section: Outliers */
/*----------------------------------------------------*/

* variables to be checked for outliers
	local continuous_vars = "local_aid_rp"

** windsorize to 0.5 and 99.5 percentiles of continuous variables
	foreach var of varlist `continuous_vars' {
		_pctile `var', p(0.5 99.5)
		qui gen `var'W = `var'
		replace `var'W = `r(r1)' if `var' < `r(r1)'
		replace `var'W = `r(r2)' if `var' > `r(r2)' & `var' != .
		// summ `var' `var'W
	}

** Remove outliers with z-scores of 12 and above
	* Tag outliers with z-scores 12 and above
		foreach var of varlist `continuous_vars' {
		  qui summ `var'
		  qui gen `var'_z = abs((`var' - `r(mean)') / `r(sd)')
		  // extremes `var'_z `var', n(20) high
		  quietly count if `var'_z >= 12 & `var'_z != .
		  di "`var' has `r(N)' values greater than 12 sd from the mean"
		  qui gen `var'_tag = 1 if `var'_z >= 12 & `var'_z != .
		}

	* set tagged values to missing
	// for now: strictly set all 12> to missing
	foreach var of varlist `continuous_vars' {
		quietly count if `var'_tag == 1

		// if tagged vals in var
		if `r(N)' > 0 {
			// copy original variable
			quietly gen `var'_orig = `var'

			// set tagged values to missing
		  quietly replace `var' = . if `var'_tag == 1
			local num_miss = `r(N)'
			di "`num_miss' values in `var' set to missing"
			extremes `var'_z `var'_orig `var', n(`num_miss')  high
		}
	}

	drop *z *tag
	qui ds *orig
	di "Original versions of vars with outliers removed: `r(varlist)'"

	* label new variables
	foreach var of varlist *W {
		local varname = substr("`var'", 1, length("`var'") - 1)
		local label : var label `varname'
		la var `var' "`label' (Winsorized)"
	}
	foreach var of varlist *_orig {
		local varname = substr("`var'", 1, length("`var'") - 5)
		local label : var label `varname'
		la var `var' "`label' (Raw)"
		local labelnew : var label `var'
	}


/*----------------------------------------------------*/
          /* Section: Merge in baseline DV */
/*----------------------------------------------------*/

** Add in baseline (march 2018) outcome variables
	preserve
	u "$cleaned/finance/susenas_mar18_finance_hh_full.dta", clear
	drop *orig
	rename smallbui_sus19_all TALL

	* collapse outcome variables by district by urban/rural areas
	rename R105 urban
	collapse (mean) internet_use cell_phone* local_gov_aid local_aid_rp* credit* savings_account = any_savings kks pkh* fin_trans_fees	[aw = FWT], by(KABU urban)


	rename *              *_mar18
	rename KABU_mar18     KABU
	rename urban_mar18    urban
	// summ
	tempfile mar_2018
	save `mar_2018'
	restore

	* merge in baseline DVs
	rename R105 urban
	merge m:1 KABU urban using `mar_2018'
	assert _m != 1
	keep if _m == 3
	drop _m

** Save experimental HHs dataset
	recode urban (2=0)
	// summ
	save "$cleaned/finance/susenas_mar19_finance_treats.dta", replace
	// u "$cleaned/finance/susenas_mar19_finance_treats.dta", clear


/*----------------------------------------------------*/
            /* Section: Individual dataset */
/*----------------------------------------------------*/

** Create kabupaten identifier
	u `ind_dataset', clear
	tostring R101, gen(R101S)
	tostring R102, gen(R102S)
	gen KABU = R101S + "0" + R102S if length(R102S) == 1
	replace KABU = R101S + R102S if length(R102S) > 1
	assert length(KABU) == 4
	destring KABU, replace
	drop R101S R102S

** Merge in kabupaten/province names
	preserve
	import excel "${importable}/province_names.xlsx", firstrow clear
	rename KodeProvinsi R101
	rename KodeKabupaten R102
	destring R101, replace
	destring R102, replace
	rename NamaKabupaten namakabupaten
	rename NamaProvinsi provinsi
	tempfile kabu_names
	save `kabu_names'
	restore

	merge  m:1 R101 R102 using "`kabu_names'", assert(3) nogen

** merge in treatment and strata
	preserve
	u "$cleaned/finance/susenas_mar19_finance_treats.dta", clear
	duplicates drop KABU, force
	keep KABU treated experimental finalstratum phase*
	tempfile treatments
	qui save `treatments'
	restore

	merge m:1 KABU using `treatments'
	assert _m != 2
	drop _m
	keep if experimental == 1

** merge in march 2018 baseline controls
	merge m:1 R101 R102 R105 using "$cleaned/finance/susenas_mar18_finance_ind.dta"
	assert _m != 1
	drop if _m == 2
	drop _m
	// summ *mar18

* save
	rename *_ind* **
	recode R105 (2=0), gen(urban)
	save "$cleaned/finance/susenas_mar19_finance_ind.dta", replace
	// u "$cleaned/finance/susenas_mar19_finance_ind.dta", clear

// DONE
cap log close
